{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "dn-6c02VmqiN"
   },
   "outputs": [],
   "source": [
    "# In this exercise you will train a CNN on the FULL Cats-v-dogs dataset\n",
    "# This will require you doing a lot of data preprocessing because\n",
    "# the dataset isn't split into training and validation for you\n",
    "# This code block has all the required inputs\n",
    "import os\n",
    "import zipfile\n",
    "import random\n",
    "import tensorflow as tf\n",
    "from tensorflow.keras.optimizers import RMSprop\n",
    "from tensorflow.keras.preprocessing.image import ImageDataGenerator\n",
    "from shutil import copyfile"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "3sd9dQWa23aj"
   },
   "outputs": [],
   "source": [
    "# This code block downloads the full Cats-v-Dogs dataset and stores it as \n",
    "# cats-and-dogs.zip. It then unzips it to /tmp\n",
    "# which will create a tmp/PetImages directory containing subdirectories\n",
    "# called 'Cat' and 'Dog' (that's how the original researchers structured it)\n",
    "# If the URL doesn't work, \n",
    "# .   visit https://www.microsoft.com/en-us/download/confirmation.aspx?id=54765\n",
    "# And right click on the 'Download Manually' link to get a new URL\n",
    "\n",
    "!wget --no-check-certificate \\\n",
    "    \"https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip\" \\\n",
    "    -O \"/tmp/cats-and-dogs.zip\"\n",
    "\n",
    "local_zip = '/tmp/cats-and-dogs.zip'\n",
    "zip_ref = zipfile.ZipFile(local_zip, 'r')\n",
    "zip_ref.extractall('/tmp')\n",
    "zip_ref.close()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "gi3yD62a6X3S"
   },
   "outputs": [],
   "source": [
    "print(len(os.listdir('/tmp/PetImages/Cat/')))\n",
    "print(len(os.listdir('/tmp/PetImages/Dog/')))\n",
    "\n",
    "# Expected Output:\n",
    "# 12501\n",
    "# 12501"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "F-QkLjxpmyK2"
   },
   "outputs": [],
   "source": [
    "# Use os.mkdir to create your directories\n",
    "# You will need a directory for cats-v-dogs, and subdirectories for training\n",
    "# and testing. These in turn will need subdirectories for 'cats' and 'dogs'\n",
    "try:\n",
    "    #YOUR CODE GOES HERE\n",
    "except OSError:\n",
    "    pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "zvSODo0f9LaU"
   },
   "outputs": [],
   "source": [
    "# Write a python function called split_data which takes\n",
    "# a SOURCE directory containing the files\n",
    "# a TRAINING directory that a portion of the files will be copied to\n",
    "# a TESTING directory that a portion of the files will be copie to\n",
    "# a SPLIT SIZE to determine the portion\n",
    "# The files should also be randomized, so that the training set is a random\n",
    "# X% of the files, and the test set is the remaining files\n",
    "# SO, for example, if SOURCE is PetImages/Cat, and SPLIT SIZE is .9\n",
    "# Then 90% of the images in PetImages/Cat will be copied to the TRAINING dir\n",
    "# and 10% of the images will be copied to the TESTING dir\n",
    "# Also -- All images should be checked, and if they have a zero file length,\n",
    "# they will not be copied over\n",
    "#\n",
    "# os.listdir(DIRECTORY) gives you a listing of the contents of that directory\n",
    "# os.path.getsize(PATH) gives you the size of the file\n",
    "# copyfile(source, destination) copies a file from source to destination\n",
    "# random.sample(list, len(list)) shuffles a list\n",
    "def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):\n",
    "# YOUR CODE STARTS HERE\n",
    "# YOUR CODE ENDS HERE\n",
    "\n",
    "\n",
    "CAT_SOURCE_DIR = \"/tmp/PetImages/Cat/\"\n",
    "TRAINING_CATS_DIR = \"/tmp/cats-v-dogs/training/cats/\"\n",
    "TESTING_CATS_DIR = \"/tmp/cats-v-dogs/testing/cats/\"\n",
    "DOG_SOURCE_DIR = \"/tmp/PetImages/Dog/\"\n",
    "TRAINING_DOGS_DIR = \"/tmp/cats-v-dogs/training/dogs/\"\n",
    "TESTING_DOGS_DIR = \"/tmp/cats-v-dogs/testing/dogs/\"\n",
    "\n",
    "split_size = .9\n",
    "split_data(CAT_SOURCE_DIR, TRAINING_CATS_DIR, TESTING_CATS_DIR, split_size)\n",
    "split_data(DOG_SOURCE_DIR, TRAINING_DOGS_DIR, TESTING_DOGS_DIR, split_size)\n",
    "\n",
    "# Expected output\n",
    "# 666.jpg is zero length, so ignoring\n",
    "# 11702.jpg is zero length, so ignoring"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "luthalB76ufC"
   },
   "outputs": [],
   "source": [
    "print(len(os.listdir('/tmp/cats-v-dogs/training/cats/')))\n",
    "print(len(os.listdir('/tmp/cats-v-dogs/training/dogs/')))\n",
    "print(len(os.listdir('/tmp/cats-v-dogs/testing/cats/')))\n",
    "print(len(os.listdir('/tmp/cats-v-dogs/testing/dogs/')))\n",
    "\n",
    "# Expected output:\n",
    "# 11250\n",
    "# 11250\n",
    "# 1250\n",
    "# 1250"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "-BQrav4anTmj"
   },
   "outputs": [],
   "source": [
    "# DEFINE A KERAS MODEL TO CLASSIFY CATS V DOGS\n",
    "# USE AT LEAST 3 CONVOLUTION LAYERS\n",
    "model = tf.keras.models.Sequential([\n",
    "# YOUR CODE HERE\n",
    "])\n",
    "\n",
    "model.compile(optimizer=RMSprop(lr=0.001), loss='binary_crossentropy', metrics=['acc'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "mlNjoJ5D61N6"
   },
   "outputs": [],
   "source": [
    "TRAINING_DIR = #YOUR CODE HERE\n",
    "train_datagen = #YOUR CODE HERE\n",
    "train_generator = #YOUR CODE HERE\n",
    "\n",
    "VALIDATION_DIR = #YOUR CODE HERE\n",
    "validation_datagen = #YOUR CODE HERE\n",
    "validation_generator = #YOUR CODE HERE\n",
    "\n",
    "\n",
    "\n",
    "# Expected Output:\n",
    "# Found 22498 images belonging to 2 classes.\n",
    "# Found 2500 images belonging to 2 classes."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "KyS4n53w7DxC"
   },
   "outputs": [],
   "source": [
    "history = model.fit_generator(train_generator,\n",
    "                              epochs=2,\n",
    "                              verbose=1,\n",
    "                              validation_data=validation_generator)\n",
    "\n",
    "# The expectation here is that the model will train, and that accuracy will be > 95% on both training and validation\n",
    "# i.e. acc:A1 and val_acc:A2 will be visible, and both A1 and A2 will be > .9"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "MWZrJN4-65RC"
   },
   "outputs": [],
   "source": [
    "# PLOT LOSS AND ACCURACY\n",
    "%matplotlib inline\n",
    "\n",
    "import matplotlib.image  as mpimg\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "#-----------------------------------------------------------\n",
    "# Retrieve a list of list results on training and test data\n",
    "# sets for each training epoch\n",
    "#-----------------------------------------------------------\n",
    "acc=history.history['acc']\n",
    "val_acc=history.history['val_acc']\n",
    "loss=history.history['loss']\n",
    "val_loss=history.history['val_loss']\n",
    "\n",
    "epochs=range(len(acc)) # Get number of epochs\n",
    "\n",
    "#------------------------------------------------\n",
    "# Plot training and validation accuracy per epoch\n",
    "#------------------------------------------------\n",
    "plt.plot(epochs, acc, 'r', \"Training Accuracy\")\n",
    "plt.plot(epochs, val_acc, 'b', \"Validation Accuracy\")\n",
    "plt.title('Training and validation accuracy')\n",
    "plt.figure()\n",
    "\n",
    "#------------------------------------------------\n",
    "# Plot training and validation loss per epoch\n",
    "#------------------------------------------------\n",
    "plt.plot(epochs, loss, 'r', \"Training Loss\")\n",
    "plt.plot(epochs, val_loss, 'b', \"Validation Loss\")\n",
    "\n",
    "\n",
    "plt.title('Training and validation loss')\n",
    "\n",
    "# Desired output. Charts with training and validation metrics. No crash :)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "LqL6FYUrtXpf"
   },
   "outputs": [],
   "source": [
    "# Here's a codeblock just for fun. You should be able to upload an image here \n",
    "# and have it classified without crashing\n",
    "\n",
    "import numpy as np\n",
    "from google.colab import files\n",
    "from keras.preprocessing import image\n",
    "\n",
    "uploaded = files.upload()\n",
    "\n",
    "for fn in uploaded.keys():\n",
    " \n",
    "  # predicting images\n",
    "  path = '/content/' + fn\n",
    "  img = image.load_img(path, target_size=(# YOUR CODE HERE))\n",
    "  x = image.img_to_array(img)\n",
    "  x = np.expand_dims(x, axis=0)\n",
    "\n",
    "  images = np.vstack([x])\n",
    "  classes = model.predict(images, batch_size=10)\n",
    "  print(classes[0])\n",
    "  if classes[0]>0.5:\n",
    "    print(fn + \" is a dog\")\n",
    "  else:\n",
    "    print(fn + \" is a cat\")"
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "collapsed_sections": [],
   "name": "Exercise 5 - Question.ipynb",
   "provenance": [
    {
     "file_id": "1dGHF5_gCd_iW3blaWG-kNce153CHvM-I",
     "timestamp": 1550529081161
    }
   ],
   "version": "0.3.2"
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
